import torch

# 假设我们有一个模型
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layer1 = torch.nn.Linear(10, 5)
        self.layer2 = torch.nn.Linear(5, 3)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = self.layer2(x)
        return x

model = Model()

# 输入数据
input_data = torch.randn(1, 10, requires_grad=True)

# 使用torch.no_grad()来防止计算中间层的梯度
with torch.no_grad():
    # 只有最终输出会保留梯度
    output = model(input_data)
output.requires_grad=True


print(torch.autograd.grad(outputs=output.sum(), inputs=input_data)[0])


